Source code for hysop.tools.numba_utils

# Copyright (c) HySoP 2011-2024
#
# This file is part of HySoP software.
# See "https://particle_methods.gricad-pages.univ-grenoble-alpes.fr/hysop-doc/"
# for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import numpy as np
from hysop import __DEFAULT_NUMBA_TARGET__
from hysop.core.arrays.array import Array

try:
    import numba as nb
    from numba import prange

    HAS_NUMBA = True
except ImportError:
    HAS_NUMBA = False


[docs] def make_numba_signature(*args, **kwds): raise_on_cl_array = kwds.pop("raise_on_cl_array", True) if kwds: msg = "Unknown kwds {}.".forma(kwds.keys()) raise RuntimeError(kwds) dtype_to_ntype = { int: nb.int64, float: nb.float64, np.int8: nb.int8, np.int16: nb.int16, np.int32: nb.int32, np.int64: nb.int64, np.uint8: nb.uint8, np.uint16: nb.uint16, np.uint32: nb.uint32, np.uint64: nb.uint64, np.float32: nb.float32, np.float64: nb.float64, np.complex64: nb.complex64, np.complex128: nb.complex128, } sizes = ("m", "n", "p", "q", "r", "s") + tuple(f"n{i}" for i in range(10)) registered_sizes = {} def format_shape(*shape): res = "(" for i, s in enumerate(shape): if s in registered_sizes: sc = registered_sizes[s] else: sc = sizes[len(registered_sizes)] registered_sizes[s] = sc res += sc if i != len(shape) - 1: res += "," res += ")" return res numba_args = () numba_layout = () for i, a in enumerate(args): from hysop.backend.device.opencl import clArray if isinstance(a, Array): a = a.handle if isinstance(a, clArray.Array): # some opencl arrays can be mapped to host if raise_on_cl_array: msg = "Numba signature: Got a cl.Array or hysop.OpenClArray for argument {} (shape={}, dtype={})." msg = msg.format(i, a.shape, a.dtype) raise ValueError(msg) assert a.dtype.type in dtype_to_ntype, a.dtype.type dtype = dtype_to_ntype[a.dtype.type] if a.flags.c_contiguous: na = nb.types.Array(dtype=dtype, ndim=a.ndim, layout="C") elif a.flags.f_contiguous: na = nb.types.Array(dtype=dtype, ndim=a.ndim, layout="F") else: na = nb.types.Array(dtype=dtype, ndim=a.ndim, layout="A") numba_layout += (format_shape(*a.shape),) elif isinstance(a, np.ndarray): assert a.dtype.type in dtype_to_ntype, a.dtype.type dtype = dtype_to_ntype[a.dtype.type] if a.flags.c_contiguous: na = nb.types.Array(dtype=dtype, ndim=a.ndim, layout="C") elif a.flags.f_contiguous: na = nb.types.Array(dtype=dtype, ndim=a.ndim, layout="F") else: na = nb.types.Array(dtype=dtype, ndim=a.ndim, layout="A") numba_layout += (format_shape(*a.shape),) elif isinstance(a, np.dtype): a = dtype_to_ntype[a.type] numba_layout += ("()",) elif isinstance(a, type): na = dtype_to_ntype[a] numba_layout += ("()",) elif type(a) in dtype_to_ntype: na = dtype_to_ntype[type(a)] numba_layout += ("()",) else: msg = f"Uknown argument type {type(a).__mro__}." raise NotImplementedError(msg) numba_args += (na,) return nb.void(*numba_args), ",".join(numba_layout)
[docs] def bake_numba_copy(dst, src, target=None): if target is None: target = __DEFAULT_NUMBA_TARGET__ signature, layout = make_numba_signature(dst, src) if dst.ndim == 1: @nb.guvectorize([signature], layout, target=target, nopython=True, cache=True) def copy(dst, src): for i in range(0, dst.shape[0]): dst[i] = src[i] elif dst.ndim == 2: @nb.guvectorize([signature], layout, target=target, nopython=True, cache=True) def copy(dst, src): for i in prange(0, dst.shape[0]): for j in range(0, dst.shape[1]): dst[i, j] = src[i, j] elif dst.ndim == 3: @nb.guvectorize([signature], layout, target=target, nopython=True, cache=True) def copy(dst, src): for i in prange(0, dst.shape[0]): for j in prange(0, dst.shape[1]): for k in range(0, dst.shape[2]): dst[i, j, k] = src[i, j, k] elif dst.ndim == 4: @nb.guvectorize([signature], layout, target=target, nopython=True, cache=True) def copy(dst, src): for i in prange(0, dst.shape[0]): for j in prange(0, dst.shape[1]): for k in prange(0, dst.shape[2]): for l in range(0, dst.shape[3]): dst[i, j, k, l] = src[i, j, k, l] else: raise NotImplementedError(dst.ndim) def _exec_copy(copy=copy, dst=dst, src=src): copy(dst, src) return _exec_copy
[docs] def bake_numba_accumulate(dst, src, target=None): if target is None: target = __DEFAULT_NUMBA_TARGET__ signature, layout = make_numba_signature(dst, src) if dst.ndim == 1: @nb.guvectorize([signature], layout, target=target, nopython=True, cache=True) def accumulate(dst, src): for i in range(0, dst.shape[0]): dst[i] += src[i] elif dst.ndim == 2: @nb.guvectorize([signature], layout, target=target, nopython=True, cache=True) def accumulate(dst, src): for i in prange(0, dst.shape[0]): for j in range(0, dst.shape[1]): dst[i, j] += src[i, j] elif dst.ndim == 3: @nb.guvectorize([signature], layout, target=target, nopython=True, cache=True) def accumulate(dst, src): for i in prange(0, dst.shape[0]): for j in prange(0, dst.shape[1]): for k in range(0, dst.shape[2]): dst[i, j, k] += src[i, j, k] elif dst.ndim == 4: @nb.guvectorize([signature], layout, target=target, nopython=True, cache=True) def accumulate(dst, src): for i in prange(0, dst.shape[0]): for j in prange(0, dst.shape[1]): for k in prange(0, dst.shape[2]): for l in range(0, dst.shape[3]): dst[i, j, k, l] += src[i, j, k, l] else: raise NotImplementedError(dst.ndim) def _exec_accumulate(accumulate=accumulate, dst=dst, src=src): accumulate(dst, src) return _exec_accumulate
[docs] def bake_numba_transpose(src, dst, axes, target=None): # inefficient permutations if target is None: target = __DEFAULT_NUMBA_TARGET__ signature, layout = make_numba_signature(dst, src) assert src.ndim == dst.ndim assert dst.shape == tuple(src.shape[i] for i in axes) assert dst.dtype == src.dtype ndim = src.ndim def noop(dst, src): pass if ndim == 1: transpose = noop elif ndim == 2: if axes == (0, 1): transpose == noop elif axes == (1, 0): @nb.guvectorize( [signature], layout, target=target, nopython=True, cache=True ) def transpose(dst, src): for i in prange(0, src.shape[0]): for j in range(0, src.shape[1]): dst[j, i] = src[i, j] else: raise NotImplementedError elif ndim == 3: if axes == (0, 1, 2): transpose == noop elif axes == (0, 2, 1): @nb.guvectorize( [signature], layout, target=target, nopython=True, cache=True ) def transpose(dst, src): for i in prange(0, src.shape[0]): for j in prange(0, src.shape[1]): for k in range(0, src.shape[2]): dst[i, k, j] = src[i, j, k] elif axes == (1, 0, 2): @nb.guvectorize( [signature], layout, target=target, nopython=True, cache=True ) def transpose(dst, src): for i in prange(0, src.shape[0]): for j in prange(0, src.shape[1]): for k in range(0, src.shape[2]): dst[j, i, k] = src[i, j, k] elif axes == (1, 2, 0): @nb.guvectorize( [signature], layout, target=target, nopython=True, cache=True ) def transpose(dst, src): for i in prange(0, src.shape[0]): for j in prange(0, src.shape[1]): for k in range(0, src.shape[2]): dst[j, k, i] = src[i, j, k] elif axes == (2, 1, 0): @nb.guvectorize( [signature], layout, target=target, nopython=True, cache=True ) def transpose(dst, src): for i in prange(0, src.shape[0]): for j in prange(0, src.shape[1]): for k in range(0, src.shape[2]): dst[k, j, i] = src[i, j, k] elif axes == (2, 0, 1): @nb.guvectorize( [signature], layout, target=target, nopython=True, cache=True ) def transpose(dst, src): for i in prange(0, src.shape[0]): for j in prange(0, src.shape[1]): for k in range(0, src.shape[2]): dst[k, i, j] = src[i, j, k] else: raise NotImplementedError(axes) else: raise NotImplementedError(ndim) def _exec_transpose(transpose=transpose, dst=dst, src=src): transpose(dst, src) return _exec_transpose